\subsection{Definition}
\begin{definition}
	Let \( (E, \mathcal E), (G, \mathcal G) \) be measurable spaces.
	A function \( f \colon E \to G \) is called \emph{\( \mathcal E \)-\( \mathcal G \)-measurable} if when \( A \in \mathcal G \), we have \( f^{-1}(A) \in \mathcal E \).
\end{definition}
Informally, the preimage of a measurable set under a measurable function is measurable.

If \( G = \mathbb R \) and \( \mathcal G = \mathcal B \), we can just say that \( f \colon (E, \mathcal E) \to G \) is measurable.
Moreover, if \( E \) is a topological space and \( \mathcal E = \mathcal B(E) \), we say \( f \) is Borel measurable.

Note that preimages \( f^{-1} \) commute with many set operations such as intersection, union, and complement.
This implies that \( \qty{f^{-1}(A) \mid A \in \mathcal G} \) is a \( \sigma \)-algebra over \( E \), and likewise, \( \qty{A \mid f^{-1}(A) \in \mathcal E} \) is a \( \sigma \)-algebra over \( G \).
Hence, if \( \mathcal A \) is a collection of subsets of \( G \) generating \( \mathcal G \) such that \( f^{-1}(A) \in \mathcal E \) for all \( A \in \mathcal A \), the class \( \qty{A \mid f^{-1} \in \mathcal E} \) is a \( \sigma \)-algebra that contains \( \mathcal A \) and hence that contains \( \mathcal G \).
In particular, it suffices to check \( f^{-1}(A) \in \mathcal E \) for all elements of a generator to conclude that \( f \) is measurable.

If \( f \colon (E, \mathcal E) \to \mathbb R \), the collection \( \mathcal A = \qty{(-\infty,y] \colon y \in \mathbb R} \) generates \( \mathcal B \) as is shown on the first example sheet.
Hence \( f \) is measurable whenever \( f^{-1}((-\infty,y]) = \qty{x \in E \mid f(x) \leq y} \in \mathcal E \) for all \( y \in \mathbb R \).

If \( E \) is a topological space and \( \mathcal E = \mathcal B(E) \), then if \( f \colon E \to \mathbb R \) is continuous, the preimages of open sets \( B \) are open, and hence Borel sets.
The open sets in \( \mathbb R \) generate the \( \sigma \)-algebra \( \mathcal B \).
Hence, continuous functions to the real line are measurable.
\begin{example}
	Consider the indicator function \( \symbb 1_A \) of a set \( A \).
	This is measurable if and only if \( A \) is measurable, or equivalently \( A \in \mathcal E \).
\end{example}
\begin{example}
	The composition of measurable functions is measurable.
	Measurability is preserved under addition, multiplication, countable infimum, countable supremum, countable limit inferior, countable limit superior, and some other operations.
	Note that given a collection of maps \( \qty{f_i \colon E \to (G,\mathcal G) \mid i \in I} \), we can make them all measurable by taking \( \mathcal E \) to be a large enough \( \sigma \)-algebra, for instance \( \sigma\qty(\qty{f_i^{-1}(A) \mid A \in \mathcal G, i \in I}) \).
\end{example}

\subsection{Monotone class theorem}
\begin{theorem}
	Let \( \mathcal A \) be a \( \pi \)-system that generates the \( \sigma \)-algebra \( \mathcal E \) over \( E \).
	Let \( \mathcal V \) be a vector space of bounded maps from \( E \) to \( \mathbb R \) such that
	\begin{enumerate}
		\item \( \symbb 1_E \in \mathcal V \);
		\item \( \symbb 1_A \in \mathcal V \) for all \( A \in \mathcal A \);
		\item if \( f \) is bounded and \( f_n \in \mathcal V \) are nonnegative functions that form an increasing sequence that converge pointwise to \( f \) on \( E \), then \( f \in \mathcal V \).
	\end{enumerate}
	Then \( \mathcal V \) contains all bounded measurable functions \( f \colon E \to \mathbb R \).
\end{theorem}
\begin{proof}
	Define \( \mathcal D = \qty{A \in \mathcal E \mid \symbb 1_A \in \mathcal V} \).
	This contains \( \mathcal A \) by hypothesis, as well as \( E \) itself.
	We show \( \mathcal D \) is a \( d \)-system, so that by Dynkin's lemma, \( \mathcal E = \mathcal D \).
	Indeed, \( E \in \mathcal D \) by assumption.
	For \( A \subseteq B \) and \( A, B \in \mathcal D \), we have \( \symbb 1_{B \setminus A} = \symbb 1_B - \symbb 1_A \) which is well-defined and lies in \( \mathcal V \) as \( \mathcal V \) is a vector space.
	Finally, if \( A_n \in \mathcal D \) increases to \( A \), we have \( \symbb 1_{A_n} \) increases pointwise to \( \symbb 1_A \), which lies in \( \mathcal V \) by the second hypothesis.
	Hence \( \mathcal E = \mathcal D \).

	Let \( f \colon E \to \mathbb R \) be a bounded measurable function, which we will assume at first is nonnegative.
	We define
	\[ f_n = \sum_{j=0}^{n2^n} \frac{j}{2^n} \symbb 1_{A_{n_j}};\quad A_{n_j} = \begin{cases}
		\qty{x \in E \mid \frac{j}{2^n} < f(x) \leq \frac{j+1}{2^n}} = f^{-1}\left(\left(\frac{j}{2^n}, \frac{j+1}{2^n}\right]\right) \in \mathcal E & \text{if } j \neq n2^n \\
		\qty{x \in E \mid n < f(x)} = f^{-1}((n,\infty)) & \text{if } j = n2^n
	\end{cases} \]
	Since \( f \) is bounded, for \( n > \norm{f}_{\infty} \), we have \( f_n \leq f \leq f_n + 2^{-n} \).
	Hence \( \abs{f_n - f} \leq 2^{-n} \to 0 \).
	By assumption, the limit of the \( f_n \), which is exactly \( f \), also lies in \( \mathcal V \).

	Now, by separating any bounded measurable function \( f \) into its positive and negative parts, we find that these two parts lie in \( \mathcal V \), and so \( f \in \mathcal V \) as required.
\end{proof}

\subsection{Image measures}
\begin{definition}
	Let \( f \colon (E,\mathcal E) \to (G,\mathcal G) \) be a measurable function, and \( \mu \) is a measure on \( (E, \mathcal E) \).
	Then the \emph{image measure} \( \nu = \mu \circ f^{-1} \) is obtained from assigning \( \nu(A) = \mu(f^{-1}(A)) \) for all \( A \in \mathcal G \).
\end{definition}
% TODO: Define right-continuous
\begin{lemma}
	Let \( g \colon \mathbb R \to \mathbb R \) be an increasing, right-continuous function, and set \( g(\pm\infty) = \lim_{z \to \pm \infty} g(z) \).
	On \( I = (g(-\infty), g(+\infty)) \) we define the \emph{generalised inverse}
	\[ f(x) = \inf \qty{y \in \mathbb R \mid x \leq g(y)} \]
	for \( x \in I \).
	Then \( f \) is increasing, left-continuous, and \( f(x) \leq y \) if and only if \( x \leq g(y) \) for all \( x \in I, y \in \mathbb R \).
\end{lemma}
\begin{remark}
	\( f \) and \( g \) form a Galois connection.
\end{remark}
\begin{proof}
	Let \( J_x = \qty{y \in \mathbb R \mid x \leq g(y)} \).
	Since \( x > g(-\infty) \), \( J_x \) is nonempty and bounded below.
	Hence \( f(x) \) is a well-defined real number.
	If \( y \in J_x \), then \( y' \geq y \) implies \( y' \in J_x \) since \( g \) is increasing.
	Further, if \( y_n \) converges from the right to \( y \), and all \( y_n \in J_x \), we can take limits in \( x \leq g(y_n) \) to find \( x \leq \lim_n g(y_n) = g(y) \) since \( g \) is right-continuous.
	Hence \( y \in J_x \).
	So \( J_x = [f(x), \infty) \).
	Hence \( f(x) \leq y \iff x \leq g(y) \) as required.

	If \( x \leq x' \), we have \( J_x \supseteq J_{x'} \) by definition, so \( f(x) \leq f(x') \).
	Similarly, if \( x_n \) converges from the left to \( x \), we have \( J_x = \bigcap_n J_{x_n} \), so \( f(x_n) \to f(x) \) as \( x_n \to x \).
\end{proof}
\begin{theorem}
	Let \( g \colon \mathbb R \to \mathbb R \) be an increasing, right-continuous function, and set \( g(\pm\infty) = \lim_{z \to \pm \infty} g(z) \).
	Then there exists a unique Radon measure \( \mu_g \) on \( \mathbb R \) such that \( \mu_g((a,b]) = g(b) - g(a) \) for all \( a < b \).
	Further, all Radon measures can be obtained in this way.
\end{theorem}
\begin{proof}
	We will show that the generalised inverse \( f \) as defined above is measurable.
	For all \( z \in \mathbb R \), we find \( f^{-1}((-\infty,z]) = \qty{x \colon f(x) \leq z} = \qty{x \colon x \leq g(z)} = [-g(\infty),g(z)] \) which is measurable.
	Since \( \mathcal B \) is generated by these such sets, \( f \) is \( \mathcal B(I) \)-\(\mathcal B \) measurable as required.
	Therefore, the image measure \( \mu_g = \mu \circ f^{-1} \), where \( \mu \) is the Lebesgue measure on \( I \), exists.
	Then for any \( -\infty < a < b < \infty \), we have
	\begin{align*}
		\mu_g((a,b]) &= \mu(f^{-1}((a,b])) \\
		&= \mu(\qty{x \colon a < f(x) \leq f(b)}) \\
		&= \mu(\qty{x \colon g(a) < x \leq g(b)}) \\
		&= g(b) - g(a)
	\end{align*}
	This uniquely determines \( \mu_g \) by the same argument as shown previously for the Lebesgue measure \( \mu \) on \( \mathbb R \).
	Since \( g \) maps into \( \mathbb R \), \( g(b) - g(a) \in \mathbb R \) so any compact set has finite measure as it is a subset of a closed bounded interval.

	Conversely, let \( \nu \) be a Radon measure on \( \mathbb R \).
	Define
	\[ g(y) = \begin{cases}
		\nu((0,y]) & \text{if } y \geq 0 \\
		-\nu((y,0]) & \text{if } y < 0
	\end{cases} \]
	This is an increasing function in \( y \), since \( \nu \) is a measure.
	Since we are using right-closed intervals, \( g \) is right-continuous.
	Finally, \( \nu((a,b]) = g(b) - g(a) \) which can be seen by case analysis and additivity of the measure \( \nu \).
	By uniqueness as before, this characterises \( \nu \) in its entirety.
\end{proof}
\begin{remark}
	Such image measures \( \mu_g \) are called \emph{Lebesgue--Stieltjes measures}, where \( g \) is the \emph{Stieltjes distribution}.
\end{remark}
\begin{example}
	The \emph{Dirac measure at \( x \)}, written \( \delta_x \), is defined by
	\[ \delta_x(A) = \begin{cases}
		1 & \text{if } x \in A \\
		0 & \text{otherwise}
	\end{cases} \]
	This has Stieltjes distribution \( g(x) = \symbb 1_{[x,\infty)} \).
\end{example}

\subsection{Random variables}
\begin{definition}
	Let \( (\Omega, \mathcal F, \mathbb P) \) be a probability space, and \( (E, \mathcal E) \) be a measurable space.
	An \emph{\( E \)-valued random variable} \( X \) is an \( \mathcal F \)-\( \mathcal E \) measurable map \( X \colon \Omega \to E \).
	When \( E = \mathbb R \) or \( \mathbb R^d \) with the Borel \( \sigma \)-algebra, we simply call \( X \) a random variable or random vector.

	The \emph{law} or \emph{distribution} \( \mu_X \) of a random variable \( X \) is given by the image measure \( \mu_X = \mathbb P \circ X^{-1} \).
	When \( E \) is the real line, this measure has a distribution function
	\[ F_X(z) = \mu_X((-\infty, z]) = \mathbb P(X^{-1}(-\infty,z]) = \prob{\qty{\omega \in \Omega \mid X(\omega) \leq z}} = \prob{X \leq z} \]
	This uniquely determines \( \mu_X \) by the \( \pi \)-system argument given above.
\end{definition}
Using the properties of measures, we can show that any distribution function satisfies:
\begin{enumerate}
	\item \( F_X \) is increasing;
	\item \( F_X \) is right-continuous;
	\item \( \lim_{z \to -\infty} F_X(z) = \mu_X(\varnothing) = 0 \);
	\item \( \lim_{z \to \infty} F_X(z) = \mu_X(\mathbb R) = \prob{\Omega} = 1 \).
\end{enumerate}
Given any function \( F_X \) satisfying each property, we can obtain a random variable \( X \) on \( (\Omega, \mathcal F, \mathbb P) = ((0,1), \mathcal B((0,1)), \mu) \) by \( X(\omega) = \inf\qty{x \mid \omega \leq f(x)} \), and then \( F_X \) is the distribution function of \( X \).
\begin{definition}
	Consider a countable collection \( (X_i \colon (\Omega, \mathcal F, \mathbb P) \to (E, \mathcal E)) \) for \( i \in I \).
	This collection of random variables is called \emph{independent} if the \( \sigma \)-algebras \( \sigma\qty(\qty{X_i^{-1}(A) \colon A \in \mathcal E}) \) are independent.
\end{definition}
For \( (E, \mathcal E) = (\mathbb R, \mathcal B) \) we show on an example sheet that this is equivalent to the condition
\[ \prob{X_1 \leq x_1, \dots, X_n \leq x_n} = \prob{X_1 \leq x_1} \dots \prob{X_n \leq x_n} \]
for all finite subsets \( \qty{X_1, \dots, X_n} \) of the \( X_i \).

\subsection{Constructing independent random variables}
We now construct an infinite sequence of independent random variables with prescribed distribution functions on \( (\Omega, \mathcal F, \mathbb P) = ((0,1), \mathcal B, \mu) \) with \( \mu \) the Lebesgue measure on \( (0,1) \).
We start with Bernoulli random variables.

Any \( \omega \in (0,1) \) has a binary representation given by \( (\omega_i) \in \qty{0,1}^{\mathbb N} \), which is unique if we exclude infinitely long tails of zeroes from the binary representation.
We can then define the \emph{\( n \)th Rademacher function} \( R_n(\omega) = \omega_n \) which extracts the \( n \)th bit from the binary expansion.
Since each \( R_n \) can be given as the sum of \( 2^{n-1} \) indicator functions on measurable sets, they are measurable functions and are hence random variables.
Their distribution is given by \( \prob{R_n = 1} = \frac{1}{2} = \prob{R_n = 0} \), so we have constructed Bernoulli random variables with parameter \( \frac 12 \).
We show they are independent.
For a finite set \( (x_i)_{i=1}^n \),
\[ \prob{R_1 = x_1, \dots, R_n = x_n} = 2^{-n} = \prob{R_1 = x_1} \dots \prob{R_n = x_n} \]
Therefore, the \( R_n \) are all independent, so countable sequences of independent random variables indeed exist.
Now, take a bijection \( m \colon \mathbb N^2 \to \mathbb N \) and define \( Y_{nk} = R_{m(n,k)} \), which are independent random variables.
We can now define \( Y_n = \sum_k 2^{-k} Y_{nk} \).
This converges for all \( \omega \in \Omega \) since \( \abs{Y_{nk}} \leq 1 \), and these are still independent.
We show the \( Y_n \) are uniform random variables, by showing the distribution coincides with the uniform distribution on the \( \pi \)-system of intervals \( \left( \frac{i}{2^m}, \frac{i+1}{2^{m+1}} \right] \) for \( i = 0, \dots, 2^m - 1 \), which generates \( \mathcal B \).
\[ \prob{Y_n \in \left( \frac{i}{2^m}, \frac{i+1}{2^m} \right]} = \prob{\frac{i}{2^m} < \sum_k 2^{-k} Y_{nk} \leq \frac{i+1}{2^n}} = 2^{-m} = \mu\left( \frac{i}{2^m}, \frac{i+1}{2^{m+1}} \right] \]
Hence \( \mu_{Y_n} = \eval{\mu}_{(0,1)} \) by the uniqueness theorem, and so we have constructed an infinite sequence of independent uniform random variables \( Y_n \).
If \( F_n \) are probability distribution functions, taking the generalised inverse, we see that the \( F_n^{-1}(Y_n) \) are independent and have distribution function \( F_n \).

\subsection{Convergence of measurable functions}
\begin{definition}
	We say that a property defining a set \( A \in \mathcal E \) holds \emph{\( \mu \)-almost everywhere} if \( \mu(A^c) = 0 \) for a measure \( \mu \) on \( \mathcal E \).
	If \( \mu = \mathbb P \), we say a property holds \emph{\( \mathbb P \)-almost surely} or \emph{with probability one}, if \( \mathbb P(A) = 1 \).
\end{definition}
\begin{definition}
	If \( f_n \) and \( f \) are measurable functions on \( (E,\mathcal E,\mu) \), we say \emph{\( f_n \) converges to \( f \) \( \mu \)-almost everywhere} if \( \mu(\qty{x \in E \mid f_n(x) \nrightarrow f(x)}) = 0 \).
	We say \emph{\( f_n \) converges to \( f \) in \( \mu \)-measure} if for all \( \varepsilon > 0 \), \( \mu(\qty{x\in E \mid \abs{f_n(x) - f(x)} > \varepsilon}) \to 0 \) as \( n \to \infty \).
	For random variables, we say \( X_n \to X \) \emph{\( \mathbb P \)-almost surely} or \emph{in \( \mathbb P \)-probability}, written \( X_n \to^p X \), respectively.
	If \( X_n, X \) take values in \( \mathbb R \), we say \( X_n \to X \) \emph{in distribution}, written \( X_n \to^d X \) if \( \prob{X_n \leq x} \to \prob{X \leq x} \) at all points \( x \) for which the limit \( x \mapsto \prob{X \leq x} \) is continuous.
\end{definition}
We can show that \( X_n \to^p X \implies X^n \to^d X \).
\begin{theorem}
	Let \( f_n \colon (E,\mathcal E,\mu) \to \mathbb R \) be measurable functions.
	Then,
	\begin{enumerate}
		\item if \( \mu(E) < \infty \), then \( f_n \to 0 \) almost everywhere implies that \( f_n \to 0 \) in measure;
		\item if \( f_n \to 0 \) in measure, \( f_{n_k} \to 0 \) almost everywhere on some subsequence.
	\end{enumerate}
\end{theorem}
\begin{proof}
	Let \( \varepsilon > 0 \).
	\[ \mu(\abs{f_n} < \varepsilon) \geq \mu\qty(\bigcap_{m \geq n} \qty{\abs{f_m} \leq \varepsilon}) \]
	The sequence \( \qty(\bigcap_{m \geq n} \qty{\abs{f_m} \leq \varepsilon})_n \) increases to \( \bigcup_n \bigcap_{m \geq n} \qty{\abs{f_m} \leq \varepsilon} \).
	So by countable additivity,
	\begin{align*}
		\mu\qty(\bigcap_{m \geq n} \qty{\abs{f_m} \leq \varepsilon}) &\to \mu\qty(\bigcup_n \bigcap_{m \geq n} \qty{\abs{f_m} \leq \varepsilon}) \\
		&= \mu\qty(\abs{f_n} \leq \varepsilon \text{ eventually}) \\
		&\geq \mu(\abs{f_n} \to 0) = \mu(E)
	\end{align*}
	Hence,
	\[ \liminf_n \mu(\abs{f_n} \leq \varepsilon) \geq \mu(E) \implies \limsup_n \mu(\abs{f_n} > \varepsilon) \leq 0 \implies \mu(\abs{f_n} > \varepsilon) \to 0 \]
	For the second part, by hypothesis, we have
	\[ \mu\qty(\abs{f_n} > \frac{1}{k}) < \varepsilon \]
	for sufficiently large \( n \).
	So choosing \( \varepsilon = \frac{1}{k^2} \), we see that along some subsequence \( n_k \) we have
	\[ \mu\qty(\abs{f_{n_k}} > \frac{1}{k}) \leq \frac{1}{k^2} \]
	Hence,
	\[ \sum_k \mu\qty(\abs{f_{n_k}} > \frac{1}{n}) < \infty \]
	So by the first Borel--Cantelli lemma, we have
	\[ \mu\qty(\abs{f_{n_k}} > \frac{1}{k} \text{ infinitely often}) = 0 \]
	so \( f_{n_k} \to 0 \) almost everywhere.
\end{proof}
\begin{remark}
	Condition (i) is false if \( \mu(E) \) is infinite: consider \( f_n = \symbb 1_{(n,\infty)} \) on \( (\mathbb R,\mathcal B,\mu) \), since \( f_n \to 0 \) almost everywhere but \( \mu(f_n) = \infty \).
	Condition (ii) is false if we do not restrict to subsequences: consider independent events \( A_n \) such that \( \prob{A_n} = \frac{1}{n} \), then \( \symbb 1_{A_n} \to 0 \) in probability since \( \prob{\symbb 1_{A_n} > \varepsilon} = \prob{A_n} = \frac{1}{n} \to 0 \), but \( \sum_n \prob{A_n} = \infty \), and by the second Borel--Cantelli lemma, \( \prob{\symbb 1_{A_n} > \varepsilon \text{ infinitely often}} = 1 \), so \( \symbb 1_{A_n} \nrightarrow 0 \) almost surely.
\end{remark}
\begin{example}
	Let \( (X_n)_{n \in \mathbb N} \) be a sequence of independent exponential random variables distributed by \( \prob{X_1 \leq x} = 1 - e^{-x} \) for \( x \geq 0 \).
	Define \( A_n = \qty{X_n \geq \alpha \log n} \) where \( \alpha > 0 \), so \( \prob{A_n} = n^{-\alpha} \), and in particular, \( \sum_n \prob{A_n} < \infty \) if and only if \( \alpha > 1 \).
	By the Borel--Cantelli lemmas, we have for all \( \varepsilon > 0 \),
	\[ \prob{\frac{X_n}{\log n} \geq 1 \text{ infinitely often}} = 1;\quad \prob{\frac{X_n}{\log n} \geq 1 + \varepsilon \text{ infinitely often}} = 0 \]
	In other words, \( \limsup_n \frac{X_n}{\log n} = 1 \) almost surely.
\end{example}

\subsection{Kolmogorov's zero-one law}
Let \( (X_n)_{n \in \mathbb N} \) be a sequence of random variables.
We can define \( \mathcal T_n = \sigma(X_{n+1}, X_{n+2}, \dots) \).
Let \( \mathcal T = \bigcap_{n \in \mathbb N} \mathcal T_n \) be the \emph{tail \( \sigma \)-algebra}, which contains all events in \( \mathcal F \) that depend only on the limiting behaviour of \( (X_n) \).
\begin{theorem}
	Let \( (X_n)_{n \in \mathbb N} \) be a sequence of independent random variables.
	Let \( A \in \mathcal T \) be an event in the tail \( \sigma \)-algebra.
	Then \( \prob{A} = 1 \) or \( \prob{A} = 0 \).
	If \( Y \colon (\Omega,\mathcal T) \to (\mathbb R,\mathcal B) \) is measurable, it is constant almost surely.
\end{theorem}
\begin{proof}
	Define \( \mathcal F_n = \sigma(X_1, \dots, X_n) \) to be the \( \sigma \)-algebra generated by the first \( n \) elements of \( (X_n) \).
	This is also generated by the \( \pi \)-system of sets \( A = \qty(X_1 \leq x_1, \dots, X_n \leq x_n) \) for any \( x_i \in \mathbb R \).
	Note that the \( \pi \)-system of sets \( B = \qty(X_{n+1} \leq x_{n+1}, \dots, X_{n+k} \leq x_{n+k}) \), for arbitrary \( k \in \mathbb N \) and \( x_i \in \mathbb R \), generates \( \mathcal T_n \).
	By independence of the sequence, we see that \( \prob{A \cap B} = \prob{A} \prob{B} \) for all such sets \( A, B \), and so the \( \sigma \)-algebras \( \mathcal T_n, \mathcal F_n \) generated by these \( \pi \)-systems are independent.

	Let \( \mathcal F_\infty = \sigma(X_1, X_2, \dots) \).
	Then, \( \bigcup_n \mathcal F_n \) is a \( \pi \)-system that generates \( \mathcal F_\infty \).
	If \( A \in \bigcup_n \mathcal F_n \), we have \( A \in \mathcal F_n \) for some \( n \), so there exists \( \overline n \) such that \( B \in \mathcal T_{\overline n} \) is independent of \( A \).
	In particular, \( B \in \bigcap_n \mathcal T_n = \mathcal T \).
	By uniqueness, \( \mathcal F_\infty \) is independent of \( \mathcal T \).

	Since \( \mathcal T \subseteq \mathcal F_\infty \), if \( A \in \mathcal T \), \( A \) is independent from \( A \).
	So \( \prob{A} = \prob{A \cap A} = \prob{A}\prob{A} \), so \( \prob{A}^2 - \prob{A} = 0 \) as required.

	Finally, if \( Y \colon (\Omega,\mathcal T) \to (\mathbb R,\mathcal B) \), the preimages of \( \qty{Y \leq y} \) lie in \( \mathcal T \), which give probability one or zero.
	Let \( c = \inf\qty{y \mid F_Y(y) = 1} \), so \( Y = c \) almost surely.
\end{proof}
